********************************
*This program set up variables for HILDA analysis 
********************************
cd "<place directory here>"
global path "<place directory here>"
global data "$path\data" //this is where you save the clean data


clear
clear matrix 
clear mata
set maxvar 100000

use "$path\rawdata\UnbalancedHH.dta" , clear 

/*
use "$path\rawdata\UnbalancedHH.dta" , clear 

keep if hsvalui>0 | hsvalui!=. 

collapse(mean) hsvalui, by(hhssa4 year)
gen hsvalue_sa4_mean=hsvalui 
xtile hprice_decile= hsvalui, n(10)

save "$path\rawdata\hsvalui.dta"
*/
merge m:1 hhssa4 year using "$path\rawdata\hsvalui.dta"

egen id = group(xwaveid)
tsset id year 

destring xwaveid, replace

*financial stress variable
g billprob = (fiprbeg ==1)
g askedhelp = (fiprbfh==1) 
g askedwelfare = (fiprbwo==1)
g soldsomething = (fiprbps==1)
g heathome = (fiprbuh==1)
g withoutmeal = (fiprbwm == 1)

g mortproblem = (fiprbmr==1)

replace mortproblem = 1 if fiprmr==1
replace mortproblem = 0 if fiprmr==0

g sfinstress = billprob  + askedhelp + askedwelfare + soldsomething + heathome + withoutmeal +mortprob
g finstress = (sfinstress > 2)	

*Household characteristics
g unemp = (esbrd==2)
replace unemp = . if esbrd < 0

g moved = (hhmove==1)
replace moved = . if hhmove < 0

*Expected to move
g expmove = mhnyr
replace expmove = . if expmove < 0
g expmover = (expmove == 1 | expmove == 2)

g dinc = hifditp -hifditn
replace dinc=0 if dinc<0
g ldinc = log(dinc)

g age = hgage
g agesq= age*age

g selfemp =  (esempst>1)
replace selfemp = . if esempst==.
g employee = (esempst==1)

g casual = (jbcasab==1)

g underemp = (jbhrcpr==3)
replace underemp = 0 if unemp == 1

g super_retiree = hwsupri 

g retiree=0
replace retiree=1 if hwsupri>0
replace retiree=. if hwsupri==. 


//health
gen health_lt= (helth==1)


//education 
gen tert_edu= (edhigh1==1| edhigh1==2|edhigh1==3)

//investor if second mortgage>0
gen investor= (hsslowe>0)

// housing tenure 
gen renter=0
replace renter=1 if hstenr==2

gen owner=0 //could be outright or indebted 
replace owner=1 if hstenr==1

replace hsmgowe=0 if hsmgowe<0
gen mortgagor=0
replace mortgagor=1 if owner==1 & hsmgowe>0

gen outright_owner=0
replace outright_owner=1 if mortgagor==0 &owner==1



*weights
g hwt_noadj =  hhwth
g hwt_adj = hhwthm 

g hwt_new = hwt_noadj
replace hwt_new = hwt_adj if hwt_noadj == 0


*Wealth variables
g bank = hwtbani
g shares = hweqini
g bonds = hwcaini
g cash = hwcaini

g liqassets = bank + shares + bonds + cash

g ccdebt = hwccdti
g pdebt = hwothdi
g busdebt = hwbusdi
g propdebt= hwtpdi

g liq_debt = ccdebt 

g liqnw = liqassets - liq_debt

g assets = hwassei
g debt = hwdebti

g house_equity = hwtpeip - hwtpein

g hdebt1 = hsmgowe //approximate amount outstanding on home loans 
replace hdebt1 = 0 if hdebt1 < 0

g hdebt = hwdebti  //household debts 
g lhdebt = log(hdebt)

g super = hwsupei
g super_ret = hwsupri
g super_nonret = hwsupwi

g life = hwinsui

g trusts = hwtrusi

g illiqnw = house_equity + life + super
gen ln_illiqnw=log(illiqnw)

** retirement 
gen retired_year=rtyr if rtyr>0 

*rtage is age retired/intends to retire. the question is asked for population aged 45+ 

gen retired=.
replace retired=1 if rtage<=age 
replace retired=0 if rtage>age 
replace retired=0 if age<=45

** job loss 

gen fired=0 if lefrd==1
replace fired=1 if lefrd==2

gen firedprob=jbmploj if jbmploj>=0 & jbmploj<=100

gen expected = 0 if firedprob!=.
replace expected = 1 if firedprob>.3
replace expected = . if firedprob == .

gen unexp = 1 if expected == 0 
replace unexp = 0 if expected == 1

xtset xwaveid year
g jobloss = (unemp == 1 & l1.unemp == 0)

gen unexpected_jobloss=0
replace unexpected_jobloss=1 if jobloss==1 & l1.unexp==1



** hours worked 
gen hours_worked= jbhruc if jbhruc>0 
gen hours_worked_prefer=jbtprhr if jbtprhr>0

*********************
*Liquidity Buffer 
*************************
g liqratio=liqassets/dinc*12
summarize liqratio, detail
egen trim_outlier=pctile(liqratio),p(99)
replace liqratio=. if liqratio>trim_outlier
replace liqratio=0 if liqratio<0

*buffers by quintile 
xtile buffer_quintile_q1=dinc if year==2002,n(5 ) 
xtile buffer_quintile_q2=dinc if year==2006,n(5 ) 
xtile buffer_quintile_q3=dinc if year==2010,n(5 )  
xtile buffer_quintile_q4=dinc if year==2014,n(5 ) 
xtile buffer_quintile_q5=dinc if year==2018,n(5 ) 
gen buffer_quintile= max(buffer_quintile_q1,buffer_quintile_q2,buffer_quintile_q3,buffer_quintile_q4,buffer_quintile_q5)



*******************************
*HtM Households
*******************************

*Set fortnightly household income
g pay = dinc/26

*Set household credit limit to one month of income (from Kaplan)
g credit_limit = dinc/12

g htm=0
replace htm=1 if (pay/2>=liqnw & liqnw>=0)
replace htm=1 if (pay/2-credit_limit>=liqnw & liqnw<=0)


*family composition
gen hhsize=hhd0_4+hhd5_9+hhd1014+hhadult

gen singleparent=0
replace singleparent=1 if hhfty==13|hhfty==14|hhfty==16|hhfty==17|hhfty==19|hhfty==20

*life events 
gen pregnant_pastyear= (leprg==2)
gen illness_pastyear =(leins==2)
gen fam_illness_pastyear= (leinf==2)
gen retire_pastyear=(lertr==2)
gen birth_pastyear=( lebth==2 )
gen marry_pastyear= (lemar==2)
gen sep_pastyear= ( lesep==2)


*annual expenditure 
gen exp_home=hxyhmrn
replace exp_home=. if hxyhmrn<0
gen ln_exp_home=log(exp_home)

***********************************************
*Mortgages*
***********************************************
g yearsataddress = hsyrcad
replace yearsataddress = . if yearsataddress<0
replace yearsataddress = round(yearsataddress,1)

g yearmoved = mhyr
replace yearmoved=. if yearmoved<0
g yrs_since_move=year-yearmoved 


*Purchase
g purchyear = hsyr 
g purchprice = hsprice
g purchdebt = hsloana


replace purchyear = . if purchyear < 0
replace purchprice = . if purchprice < 0
replace purchdebt = 0 if purchdebt < 0

foreach i in purchyear purchprice purchdebt {
foreach j in 2002 2006 2010 2014 2018 {
g `i'`j' = `i' if year == `j'
bysort xwaveid: egen `i'`j'max = max(`i'`j') 
}
}


*There are some inconsistencies here but seems ok:
g purchyear_report = .
g purchprice_report = .
g purchdebt_report = .

foreach i in purchyear purchprice purchdebt {
foreach j in 2002 2006 2010 2014 2018 {
replace `i'_report = `i'`j'max if year >= purchyear`j'max
}
}

foreach i in purchyear purchprice purchdebt {
foreach j in 2002 2006 2010 2014 2018 {
drop `i'`j'max `i'`j'
}
}

*What to do about people that report no debt at origination?
g nodebt = (purchdebt == 0)
tab nodebt
g nodebt2 = (purchdebt_report == 0)
tab nodebt2
drop nodebt*

g purchduration =year-purchyear_report



*Refinancings

*Ever refinanced?
g refinance = (hsevref==1)
replace refinance = . if hsevref == .

*Total value AFTER refinancing
g refdebt = hsrefvl
replace refdebt = . if refdebt < 0

g refyear = hsrefy
replace refyear = . if refyear < 0


foreach i in refyear refdebt {
foreach j in 2006 2010 2014 2018 {
g `i'`j' = `i' if year == `j'
bysort xwaveid: egen `i'`j'max = max(`i'`j') 
}
}

g refyear_report = .
g refdebt_report = .
foreach i in refyear refdebt {
foreach j in 2006 2010 2014 2018 {
replace `i'_report = `i'`j'max if year >= refyear`j'max
}
}

foreach i in refyear refdebt {
foreach j in 2006 2010 2014 2018 {
drop `i'`j'max `i'`j'
}
}


g refduration = year-refyear_report


*MORTGAGE DURATION
*This updates for time of refinance
g pduration = purchduration
replace pduration = yearsataddress if yearsataddress < pduration 
replace pduration = refduration if refduration < pduration 
replace pduration = 0 if pduration == -1

foreach i in hsloana {
foreach j in 2002 2006 2010 2014 2018 {
g `i'`j' = `i' if year == `j'
bysort xwaveid: egen `i'`j'max = max(`i'`j') 
}
}

g hsloana_report=.
foreach i in hsloana {
foreach j in 2002 2006 2010 2014 2018 {
replace `i'_report = `i'`j'max if year >= `j'
}
}

g origdebt = purchdebt_report
replace origdebt = refdebt_report if refdebt_report ~=.
g lorigdebt = log(origdebt)


*refinance in previous wealth modules*
gen refinance_recent=  (refinance==1 & year-refyear<=4 ) 


*years remaining on OO loans 
gen yrs_remain=hsmgyc
gen yrs_remain_second=hsslyc

gen loan_age=30- hsmgyc
replace loan_age=. if hsmgyc<0
replace loan_age=. if loan_age<0

*capital city vs. the rest 
g capcity = hhsgcc
g sydney = (capcity == 11)
g melbourne = (capcity == 21)
g expensive = (sydney == 1 |melbourne ==1)

gen liqratio_capacity=liqratio if expensive==1
gen liqratio_regional=liqratio if expensive==0

*Identify the number of years since household become a first home buyer and the exact year their first purchase occured

gen fhb_age=rpage
gen backward= 3000-year
sort xwaveid backward
bysort xwaveid: carryforward fhb_age, replace

replace fhb_age=. if fhb_age<0
g yrs_since = age-fhb_age
label variable yrs_since "Years since first home purchase"
	
g fhb_year=year-yrs_since
label variable fhb_year  "Year first purchased home"

gen yrs_since_sq=yrs_since*yrs_since

gen retirement_age=rtage if rtage>0 & rtage!=.

gen yrs_since_retirement=age-retirement_age if retirement_age!=.

gen firsthome=1 if fhb_year-hsyr<2 & fhb_year-hsyr>-2 
sort xwaveid backward 
bysort xwaveid: carryforward firsthome, replace
sort xwaveid year 


*paying off mortgages

gen paidoff=(hsmgpd==1) // this variable is avaialble every year 
replace paidoff=. if hsmgpd<0 

tsset xwaveid year
gen paidoff_year=year if paidoff==1 & l1.paidoff==0

bysort xwaveid: carryforward paidoff_year, replace 

sort xwaveid backward 
bysort xwaveid: carryforward paidoff_year, replace 
sort xwaveid year


*years since pay off mortgages 
g yrs_since_paidoff=year-paidoff_year


*expected time to pay off mortgages 
gen expect_paidoff=hsmgfin if hsmgfin >0 & hsmgfin!=.
gen expect_paidoff_year=expect_paidoff - year

*paying off mortgages *alternative measure , households that went from mortgagors to non-mortgagors 
gen paidoff2= (mortgagor==1)
tsset xwaveid year
gen paidoff_year2=year if paidoff2==0 & l1.paidoff2==1

bysort xwaveid: carryforward paidoff_year2, replace 

sort xwaveid backward 
bysort xwaveid: carryforward paidoff_year2, replace 
sort xwaveid year

*years since paying off mortgages using alternative measure 
g yrs_since_paidoff2=year-paidoff_year2



*******************
*HOUSING PRICE
********************
***buffer & housing prices for mortgagors 

gen ln_hprice=log(hsvalui)
gen ln_dinc=log(dinc)
gen hprice_to_dinc=hsvalui/dinc

gen ln_hprice_sa4_mean=log(hsvalue_sa4_mean)
replace hprice_to_dinc=hsvalue_sa4_mean/dinc if renter==1 //housing value for renters = reported housing value of homeowners in the same SA4 area

gen before2010=1 if year<2010
replace before2010=0 if year>=2010
xtile hprice_decile_before2010=hsvalue if before2010==1,n(10 ) 
xtile hprice_decile_after2010=hsvalue if before2010==0,n(10 ) 

gen hprice_decile_decade = max(hprice_decile_after2010,hprice_decile_before2010)



*********************************
*fixed/variable rate
*********************************
*rate type for first mortgage 
gen rate_type=0 if hsmgfv==1 //fixed rate 
replace rate_type=1 if hsmgfv==2 | hsmgfv==3 //variable rate or combo

*rate type for second mortgage 
gen rate_type2=0 if hsslfv==1 //fixed rate 
replace rate_type2=1 if hsslfv==2 | hsslfv==3  //variable rate or combo 

*io loans
gen io=1 if hsmgtl2==2 
replace io=0 if hsmgtl2==1 



***************
* job perception 
**********************

*PQ: Percent chance of losing job in next 12 months
g ejobloss = jbmploj
replace ejobloss = . if jbmploj > 100
replace ejobloss = . if jbmploj < 0
replace ejobloss = ejobloss/100


g risky = (ejobloss > 0.1)
replace risky = . if ejobloss == . 

  *Expect to lose your job before mortgage is fully repaid 
  g empduration = 1/ejobloss
  g loser = (empduration - expect_paidoff_year < 0) 
  replace loser = . if ejobloss == .
  replace loser = . if expect_paidoff_year== . 

  
******************
*mortgage repayment
******************
*debt servicing ratio 
g dsr=hsmgi/(dinc/12)*100 if hsmgi>0 & hsmgi!=. & mortgagor==1 

*gearing ratio 
g gearing= hdebt1/hsprice*100

*usual mortgage repayment 
gen usual_mort=hsmga

gen usual_mort_month=usual_mort*4 if hsmgf==1
replace usual_mort_month=usual_mort*2 if hsmgf==2
replace usual_mort_month=usual_mort if hsmgf==3 | hsmgf==4
replace usual_mort_month=usual_mort/3 if hsmgf==5 

*minimum mortgage repayment 
gen min_mort=hsmgmn 
gen min_mort_month=min_mort*4 if hsmgmf==1
replace min_mort_month=min_mort*2 if hsmgmf==2
replace min_mort_month=min_mort if hsmgmf==3 | hsmgmf==4
replace min_mort_month=min_mort/3 if hsmgmf==5 

//hsmgi is mortgage usual repayments per month; hssli is second mortgage usual repayments per month 

*income uncertainties 
g winc = hiwsfei
g lwinc = log(winc)
  
g emp = (esbrd==1)



g parttime = (hges ==2)
g fulltime = (hges ==1)


g permanent = (jbmcnt==3)
g fixedterm = (jbmcnt == 1)

g jtenure = jbempt
replace jtenure = . if jtenure < 0

g jtenuresq = jtenure*jtenure 

g occupation = jbmo06
replace occupation = . if occupation < 0
 
g size = jbmwpsz
replace size = . if size < 0
g small = (size < 4)
replace small = . if size == .

g supervisor = (jbmsvsr==1)
 
*INDUSTRY OF EMPLOYMENT  
g division = jbmi61 
g industry = jbmi62
replace division = . if division< 0
replace industry = . if industry < 0


*Local unemployment
g urate = hhura
replace urate = urate/100
replace urate = . if urate < 0

*Education 
g uni = (edhigh1> 0 & edhigh1 < 4)
g tafe = (edhigh1> 3 & edhigh1 < 8)
g school = (edhigh1> 5 & edhigh1 < 10)


*Health condition 
g healthprob = (hglth== 1)
g healthprob2 = (helth== 1)



